In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import operator
%matplotlib inline

This notebook will investigate instances where the river is reversed, and sewage is dumped into the lake. We will take a look at rainfall before these events, to see if there is a correlation


In [2]:
# Get River reversals
reversals = pd.read_csv('data/lake_michigan_reversals.csv')
reversals['start_date'] = pd.to_datetime(reversals['start_date'])
reversals.head()


Out[2]:
crcw date_raw obrien start_date total wilmette year
0 997.5 6/15-16/2015 0.0 2015-06-15 1164.7 167.2 2015
1 362.0 6/30-7/1/14 0.0 2014-06-30 525.0 163.0 2014
2 6104.7 4/18-19/13 3185.6 2013-04-18 10719.5 1429.2 2013
3 1716.2 7/24/2011 0.0 2011-07-24 2220.5 504.3 2011
4 0.0 5/29/2011 0.0 2011-05-29 107.0 107.0 2011

In [5]:
# Create rainfall dataframe.  Create a series that has hourly precipitation
rain_df = pd.read_csv('data/ohare_hourly_20160929.csv')
rain_df['datetime'] = pd.to_datetime(rain_df['datetime'])
rain_df = rain_df.set_index(pd.DatetimeIndex(rain_df['datetime']))
rain_df = rain_df['19700101':]
chi_rain_series = rain_df['HOURLYPrecip'].resample('1H', label='right').max()
chi_rain_series.head()


Out[5]:
1970-01-01 04:00:00    0.0
1970-01-01 05:00:00    NaN
1970-01-01 06:00:00    NaN
1970-01-01 07:00:00    0.0
1970-01-01 08:00:00    NaN
Freq: H, Name: HOURLYPrecip, dtype: float64

In [6]:
# Find the rainfall 'hours' hours before the timestamp
def cum_rain(timestamp, hours):
    end_of_day = (timestamp + timedelta(days=1)).replace(hour=0, minute=0)
    start_time = end_of_day - timedelta(hours=(hours-1))
    return chi_rain_series[start_time:end_of_day].sum()
    
t = pd.to_datetime('2015-06-15')
cum_rain(t, 240)


Out[6]:
4.6700000000000008

In [7]:
# Set the ten_day_rain field in reversals to the amount of rain that fell the previous 10 days (including the day that
# the lock was opened)
# TODO: Is there a more Pandaic way to do this?
for index, reversal in reversals.iterrows():
    reversals.loc[index,'ten_day_rain'] = cum_rain(reversal['start_date'], 240)
reversals


Out[7]:
crcw date_raw obrien start_date total wilmette year ten_day_rain
0 997.5 6/15-16/2015 0.0 2015-06-15 1164.7 167.2 2015 4.67
1 362.0 6/30-7/1/14 0.0 2014-06-30 525.0 163.0 2014 4.91
2 6104.7 4/18-19/13 3185.6 2013-04-18 10719.5 1429.2 2013 7.40
3 1716.2 7/24/2011 0.0 2011-07-24 2220.5 504.3 2011 8.32
4 0.0 5/29/2011 0.0 2011-05-29 107.0 107.0 2011 5.17
5 5784.6 7/24/2010 0.0 2010-07-24 6534.9 750.3 2010 6.63
6 0.0 6/19/2009 0.0 2009-06-19 191.6 191.6 2009 5.68
7 0.0 3/8/2009 0.0 2009-03-08 143.1 143.1 2009 2.82
8 0.0 2/26-27/09 0.0 2009-02-26 78.9 78.9 2009 2.39
9 0.0 12/27-28/08 0.0 2008-12-27 460.8 460.8 2008 3.02
10 5438.2 9/13-16/08 2669.2 2008-09-13 11049.1 2941.7 2008 11.12
11 0.0 8/23-24/07 0.0 2007-08-23 224.0 224.0 2007 5.75
12 1296.4 8/22/2002 0.0 2002-08-22 1751.8 455.4 2002 7.26
13 0.0 10/13/2001 0.0 2001-10-13 90.7 90.7 2001 5.69
14 0.0 8/31/2001 0.0 2001-08-31 75.3 75.3 2001 9.26
15 883.1 8/2/2001 0.0 2001-08-02 1023.0 139.9 2001 2.12
16 0.0 6/13/1999 0.0 1999-06-13 9.7 9.7 1999 3.64
17 402.0 8/16-8/17/97 0.0 1997-08-16 559.0 157.0 1997 3.46
18 1947.0 2/20-2/22/97 1458.0 1997-02-20 4179.0 774.0 1997 0.74
19 519.0 7/17-7/18/96 1032.0 1996-07-17 1551.0 0.0 1996 2.41
20 86.0 11/27-11/28/90 224.0 1990-11-27 464.0 154.0 1990 3.30
21 0.0 8/17-8/18/90 0.0 1990-08-17 9.5 9.5 1990 2.24
22 208.0 5/9-5/10/90 0.0 1990-05-09 497.0 289.0 1990 4.83
23 0.0 8/3-8/4/89 0.0 1989-08-03 52.0 52.0 1989 1.75
24 0.0 8/25-8/26/87 0.0 1987-08-25 18.0 18.0 1987 4.08
25 986.0 8/13-8/14/87 0.0 1987-08-13 1957.0 971.0 1987 3.01
26 0.0 10/3/1986 0.0 1986-10-03 53.0 53.0 1986 5.53
27 0.0 8/6/1985 0.0 1985-08-06 58.0 58.0 1985 2.62
28 0.0 3/4/1985 0.0 1985-03-04 153.3 153.3 1985 4.28

In [8]:
# Information about the 10 days that preceed these overflows
reversals['ten_day_rain'].describe(percentiles=[.25, .5, .75])


Out[8]:
count    29.000000
mean      4.624138
std       2.412203
min       0.740000
25%       2.820000
50%       4.280000
75%       5.690000
max      11.120000
Name: ten_day_rain, dtype: float64

Now we will look at any n-year storms that occurred during the 10 days prior to the reversal


In [9]:
# N-Year Storm stuff
n_year_threshes = pd.read_csv('../../n-year/notebooks/data/n_year_definitions.csv')
n_year_threshes = n_year_threshes.set_index('Duration')
dur_str_to_hours = {
    '5-min':5/60.0,
    '10-min':10/60.0,
    '15-min':15/60.0,
    '30-min':0.5,
    '1-hr':1.0,
    '2-hr':2.0,
    '3-hr':3.0,
    '6-hr':6.0,
    '12-hr':12.0,
    '18-hr':18.0,
    '24-hr':24.0,
    '48-hr':48.0,
    '72-hr':72.0,
    '5-day':5*24.0,
    '10-day':10*24.0
}
n_s = [int(x.replace('-year','')) for x in reversed(list(n_year_threshes.columns.values))]
duration_strs = sorted(dur_str_to_hours.items(), key=operator.itemgetter(1), reverse=False)
n_year_threshes


Out[9]:
1-year 2-year 5-year 10-year 25-year 50-year 100-year
Duration
10-day 4.12 4.95 6.04 6.89 8.18 9.38 11.14
5-day 3.25 3.93 4.91 5.70 6.93 8.04 9.96
72-hr 2.93 3.55 4.44 5.18 6.32 7.41 8.78
48-hr 2.70 3.30 4.09 4.81 5.88 6.84 8.16
24-hr 2.51 3.04 3.80 4.47 5.51 6.46 7.58
18-hr 2.30 2.79 3.50 4.11 5.06 5.95 6.97
12-hr 2.18 2.64 3.31 3.89 4.79 5.62 6.59
6-hr 1.88 2.28 2.85 3.35 4.13 4.85 5.68
3-hr 1.60 1.94 2.43 2.86 3.53 4.14 4.85
2-hr 1.48 1.79 2.24 2.64 3.25 3.82 4.47
1-hr 1.18 1.43 1.79 2.10 2.59 3.04 3.56
30-min 0.93 1.12 1.41 1.65 2.04 2.39 2.80
15-min 0.68 0.82 1.03 1.21 1.49 1.75 2.05
10-min 0.55 0.67 0.84 0.98 1.21 1.42 1.67
5-min 0.30 0.36 0.46 0.54 0.66 0.78 0.91

In [10]:
# This method returns the first n-year storm found in a given interval.  It starts at the 100-year storm and decriments, so
# will return the highest n-year storm found
def find_n_year_storm(start_time, end_time):
    for n in n_s:
        n_index = n_s.index(n)
        next_n = n_s[n_index-1] if n_index != 0 else None

        for duration_tuple in reversed(duration_strs):

            duration_str = duration_tuple[0]
            low_thresh = n_year_threshes.loc[duration_str, str(n) + '-year']
            high_thresh = n_year_threshes.loc[duration_str, str(next_n) + '-year'] if next_n is not None else None
        
            duration = int(dur_str_to_hours[duration_str])
            sub_series = chi_rain_series[start_time: end_time]
            rolling = sub_series.rolling(window=int(duration), min_periods=0).sum()
        
            if high_thresh is not None:
                event_endtimes = rolling[(rolling >= low_thresh) & (rolling < high_thresh)].sort_values(ascending=False)
            else:
                event_endtimes = rolling[(rolling >= low_thresh)].sort_values(ascending=False)
            if len(event_endtimes) > 0:
                return {'inches': event_endtimes[0], 'n': n, 'end_time': event_endtimes.index[0], 'hours': duration}
    return None

start_time = pd.to_datetime('2008-09-04 01:00:00')
end_time = pd.to_datetime('2008-09-14 20:00:00')
find_n_year_storm(start_time, end_time)


Out[10]:
{'end_time': Timestamp('2008-09-14 13:00:00'),
 'hours': 240,
 'inches': 11.939999999999998,
 'n': 100}

In [11]:
# Add a column to the reversals data frame to show n-year storms that occurred before the reversal
# TODO: Is there a more Pandaic way to do this?
for index, reversal in reversals.iterrows():
    end_of_day = (reversal['start_date'] + timedelta(days=1)).replace(hour=0, minute=0)
    start_time = end_of_day - timedelta(days=10)
    reversals.loc[index,'find_n_year_storm'] = str(find_n_year_storm(start_time, end_of_day))
reversals


Out[11]:
crcw date_raw obrien start_date total wilmette year ten_day_rain find_n_year_storm
0 997.5 6/15-16/2015 0.0 2015-06-15 1164.7 167.2 2015 4.67 {'hours': 120, 'n': 2, 'end_time': Timestamp('...
1 362.0 6/30-7/1/14 0.0 2014-06-30 525.0 163.0 2014 4.91 {'hours': 12, 'n': 2, 'end_time': Timestamp('2...
2 6104.7 4/18-19/13 3185.6 2013-04-18 10719.5 1429.2 2013 7.40 {'hours': 18, 'n': 25, 'end_time': Timestamp('...
3 1716.2 7/24/2011 0.0 2011-07-24 2220.5 504.3 2011 8.32 {'hours': 24, 'n': 100, 'end_time': Timestamp(...
4 0.0 5/29/2011 0.0 2011-05-29 107.0 107.0 2011 5.17 {'hours': 240, 'n': 2, 'end_time': Timestamp('...
5 5784.6 7/24/2010 0.0 2010-07-24 6534.9 750.3 2010 6.63 {'hours': 24, 'n': 50, 'end_time': Timestamp('...
6 0.0 6/19/2009 0.0 2009-06-19 191.6 191.6 2009 5.68 {'hours': 120, 'n': 5, 'end_time': Timestamp('...
7 0.0 3/8/2009 0.0 2009-03-08 143.1 143.1 2009 2.82 {'hours': 48, 'n': 1, 'end_time': Timestamp('2...
8 0.0 2/26-27/09 0.0 2009-02-26 78.9 78.9 2009 2.39 None
9 0.0 12/27-28/08 0.0 2008-12-27 460.8 460.8 2008 3.02 None
10 5438.2 9/13-16/08 2669.2 2008-09-13 11049.1 2941.7 2008 11.12 {'hours': 240, 'n': 50, 'end_time': Timestamp(...
11 0.0 8/23-24/07 0.0 2007-08-23 224.0 224.0 2007 5.75 {'hours': 120, 'n': 5, 'end_time': Timestamp('...
12 1296.4 8/22/2002 0.0 2002-08-22 1751.8 455.4 2002 7.26 {'hours': 240, 'n': 10, 'end_time': Timestamp(...
13 0.0 10/13/2001 0.0 2001-10-13 90.7 90.7 2001 5.69 {'hours': 48, 'n': 5, 'end_time': Timestamp('2...
14 0.0 8/31/2001 0.0 2001-08-31 75.3 75.3 2001 9.26 {'hours': 3, 'n': 50, 'end_time': Timestamp('2...
15 883.1 8/2/2001 0.0 2001-08-02 1023.0 139.9 2001 2.12 None
16 0.0 6/13/1999 0.0 1999-06-13 9.7 9.7 1999 3.64 {'hours': 72, 'n': 2, 'end_time': Timestamp('1...
17 402.0 8/16-8/17/97 0.0 1997-08-16 559.0 157.0 1997 3.46 None
18 1947.0 2/20-2/22/97 1458.0 1997-02-20 4179.0 774.0 1997 0.74 None
19 519.0 7/17-7/18/96 1032.0 1996-07-17 1551.0 0.0 1996 2.41 None
20 86.0 11/27-11/28/90 224.0 1990-11-27 464.0 154.0 1990 3.30 {'hours': 72, 'n': 1, 'end_time': Timestamp('1...
21 0.0 8/17-8/18/90 0.0 1990-08-17 9.5 9.5 1990 2.24 None
22 208.0 5/9-5/10/90 0.0 1990-05-09 497.0 289.0 1990 4.83 {'hours': 24, 'n': 2, 'end_time': Timestamp('1...
23 0.0 8/3-8/4/89 0.0 1989-08-03 52.0 52.0 1989 1.75 None
24 0.0 8/25-8/26/87 0.0 1987-08-25 18.0 18.0 1987 4.08 {'hours': 2, 'n': 10, 'end_time': Timestamp('1...
25 986.0 8/13-8/14/87 0.0 1987-08-13 1957.0 971.0 1987 3.01 {'hours': 3, 'n': 5, 'end_time': Timestamp('19...
26 0.0 10/3/1986 0.0 1986-10-03 53.0 53.0 1986 5.53 {'hours': 240, 'n': 2, 'end_time': Timestamp('...
27 0.0 8/6/1985 0.0 1985-08-06 58.0 58.0 1985 2.62 None
28 0.0 3/4/1985 0.0 1985-03-04 153.3 153.3 1985 4.28 {'hours': 240, 'n': 1, 'end_time': Timestamp('...

In [12]:
no_n_year = reversals.loc[reversals['find_n_year_storm'] == 'None']
print("There are %s reversals without an n-year event" % len(no_n_year))
no_n_year


There are 9 reversals without an n-year event
Out[12]:
crcw date_raw obrien start_date total wilmette year ten_day_rain find_n_year_storm
8 0.0 2/26-27/09 0.0 2009-02-26 78.9 78.9 2009 2.39 None
9 0.0 12/27-28/08 0.0 2008-12-27 460.8 460.8 2008 3.02 None
15 883.1 8/2/2001 0.0 2001-08-02 1023.0 139.9 2001 2.12 None
17 402.0 8/16-8/17/97 0.0 1997-08-16 559.0 157.0 1997 3.46 None
18 1947.0 2/20-2/22/97 1458.0 1997-02-20 4179.0 774.0 1997 0.74 None
19 519.0 7/17-7/18/96 1032.0 1996-07-17 1551.0 0.0 1996 2.41 None
21 0.0 8/17-8/18/90 0.0 1990-08-17 9.5 9.5 1990 2.24 None
23 0.0 8/3-8/4/89 0.0 1989-08-03 52.0 52.0 1989 1.75 None
27 0.0 8/6/1985 0.0 1985-08-06 58.0 58.0 1985 2.62 None

In [13]:
reversals.loc[reversals['year'] == 1997]


Out[13]:
crcw date_raw obrien start_date total wilmette year ten_day_rain find_n_year_storm
17 402.0 8/16-8/17/97 0.0 1997-08-16 559.0 157.0 1997 3.46 None
18 1947.0 2/20-2/22/97 1458.0 1997-02-20 4179.0 774.0 1997 0.74 None

In [15]:
reversals.sort_values('crcw', ascending=False)


Out[15]:
crcw date_raw obrien start_date total wilmette year ten_day_rain find_n_year_storm
2 6104.7 4/18-19/13 3185.6 2013-04-18 10719.5 1429.2 2013 7.40 {'hours': 18, 'n': 25, 'end_time': Timestamp('...
5 5784.6 7/24/2010 0.0 2010-07-24 6534.9 750.3 2010 6.63 {'hours': 24, 'n': 50, 'end_time': Timestamp('...
10 5438.2 9/13-16/08 2669.2 2008-09-13 11049.1 2941.7 2008 11.12 {'hours': 240, 'n': 50, 'end_time': Timestamp(...
18 1947.0 2/20-2/22/97 1458.0 1997-02-20 4179.0 774.0 1997 0.74 None
3 1716.2 7/24/2011 0.0 2011-07-24 2220.5 504.3 2011 8.32 {'hours': 24, 'n': 100, 'end_time': Timestamp(...
12 1296.4 8/22/2002 0.0 2002-08-22 1751.8 455.4 2002 7.26 {'hours': 240, 'n': 10, 'end_time': Timestamp(...
0 997.5 6/15-16/2015 0.0 2015-06-15 1164.7 167.2 2015 4.67 {'hours': 120, 'n': 2, 'end_time': Timestamp('...
25 986.0 8/13-8/14/87 0.0 1987-08-13 1957.0 971.0 1987 3.01 {'hours': 3, 'n': 5, 'end_time': Timestamp('19...
15 883.1 8/2/2001 0.0 2001-08-02 1023.0 139.9 2001 2.12 None
19 519.0 7/17-7/18/96 1032.0 1996-07-17 1551.0 0.0 1996 2.41 None
17 402.0 8/16-8/17/97 0.0 1997-08-16 559.0 157.0 1997 3.46 None
1 362.0 6/30-7/1/14 0.0 2014-06-30 525.0 163.0 2014 4.91 {'hours': 12, 'n': 2, 'end_time': Timestamp('2...
22 208.0 5/9-5/10/90 0.0 1990-05-09 497.0 289.0 1990 4.83 {'hours': 24, 'n': 2, 'end_time': Timestamp('1...
20 86.0 11/27-11/28/90 224.0 1990-11-27 464.0 154.0 1990 3.30 {'hours': 72, 'n': 1, 'end_time': Timestamp('1...
24 0.0 8/25-8/26/87 0.0 1987-08-25 18.0 18.0 1987 4.08 {'hours': 2, 'n': 10, 'end_time': Timestamp('1...
26 0.0 10/3/1986 0.0 1986-10-03 53.0 53.0 1986 5.53 {'hours': 240, 'n': 2, 'end_time': Timestamp('...
27 0.0 8/6/1985 0.0 1985-08-06 58.0 58.0 1985 2.62 None
21 0.0 8/17-8/18/90 0.0 1990-08-17 9.5 9.5 1990 2.24 None
23 0.0 8/3-8/4/89 0.0 1989-08-03 52.0 52.0 1989 1.75 None
14 0.0 8/31/2001 0.0 2001-08-31 75.3 75.3 2001 9.26 {'hours': 3, 'n': 50, 'end_time': Timestamp('2...
16 0.0 6/13/1999 0.0 1999-06-13 9.7 9.7 1999 3.64 {'hours': 72, 'n': 2, 'end_time': Timestamp('1...
13 0.0 10/13/2001 0.0 2001-10-13 90.7 90.7 2001 5.69 {'hours': 48, 'n': 5, 'end_time': Timestamp('2...
11 0.0 8/23-24/07 0.0 2007-08-23 224.0 224.0 2007 5.75 {'hours': 120, 'n': 5, 'end_time': Timestamp('...
9 0.0 12/27-28/08 0.0 2008-12-27 460.8 460.8 2008 3.02 None
8 0.0 2/26-27/09 0.0 2009-02-26 78.9 78.9 2009 2.39 None
7 0.0 3/8/2009 0.0 2009-03-08 143.1 143.1 2009 2.82 {'hours': 48, 'n': 1, 'end_time': Timestamp('2...
6 0.0 6/19/2009 0.0 2009-06-19 191.6 191.6 2009 5.68 {'hours': 120, 'n': 5, 'end_time': Timestamp('...
4 0.0 5/29/2011 0.0 2011-05-29 107.0 107.0 2011 5.17 {'hours': 240, 'n': 2, 'end_time': Timestamp('...
28 0.0 3/4/1985 0.0 1985-03-04 153.3 153.3 1985 4.28 {'hours': 240, 'n': 1, 'end_time': Timestamp('...

In [ ]: